################################################################################
### Replication Code: Statisitcal Analysis 2016
################################################################################
#
# Paper: Rationalizing Protest Participation  
#
# Authors: Tim Baule, Jonathan Bothner, Maximilian Kähny
#
# Software: R version 4.4.0 using Windows
#
################################################################################

# load packages
library(haven)
library(dplyr)
library(tidyr)
library(spdep)
library(Matrix)
library(readr)
library(spatialreg)
library(sphet)
library(spatialprobit)
library(ProbitSpatial)
library(igraph)
library(stargazer)
library(vtable)

# load data
rm(list = ls())
setwd("C:/Users/timba/OneDrive - Universität Bayreuth (1)/Uni/Research General/Protest Network Project/empirics/do files and skripts/final")
anes_cc <- read_dta("anes_timeseries_2016_clean.dta")
spatial_ids <- read_csv("spatial_matrix_ids.csv")
pol_cluster_matrix_ids = read_csv("pol_cluster_matrix_ids.csv")
political_clusters_3to11 = read_csv("political_clusters_3to11.csv")


##################################################
### Data Prep Function
##################################################

### define function
data_setup = function(data, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig){
  
  # filtering of clusters
  if (decision_cluster == "none") {
    anes_trim = anes_cc
    #  } else if (decision_cluster == "pol") {
    #    anes_trim = anes_cc %>%
    #      filter(c3 == selection_cluster_pol)
    #  } else {
    #    anes_trim = anes_cc %>%
    #      filter(.[[selection_cluster_protest]] == 1)
  }
  
  
  # variables for mahalanobis distance
  covariate_names <- c("leftright", "reli", "trust", "age","educ", "inc", "volunteer", "community_service", "attend_church", "longitude", "latitude")
  Xcov <- as.matrix(anes_trim[, covariate_names])
  n <- nrow(Xcov)
  
  # Mahalanobis distance and proximity matrix
  Sigma <- cov(Xcov)
  D2 <- matrix(0, n, n)
  for (i in seq_len(n)) {
    D2[i, ] <- mahalanobis(Xcov, center = Xcov[i, ], cov = Sigma)
  }
  D <- sqrt(D2)
  invD <- 1 / D
  diag(invD) <- 0
  invD[!is.finite(invD)] <- 0
  
  # define number of neighbors
  Wbin_cc <- matrix(0, n, n)
  for (i in seq_len(n)) {
    top_neig <- order(invD[i, ], decreasing = TRUE)[1:neig]
    Wbin_cc[i, top_neig] <- 1
  }
  
  # Create spatial weights object
  Wmat_cc <- Matrix(Wbin_cc, sparse = TRUE)
  listw_cc <- mat2listw(Wmat_cc, style = "W", zero.policy = FALSE)
  stopifnot(all(rowSums(Wbin_cc) == neig))
  
  # return
  return(list(listw_cc = listw_cc, anes_trim = anes_trim))
}


##################################################
### Spatial Estimation
##################################################

### Replication of Table 8 
# vary "neig" to obtain results for specifications with different nearest neighbors


# Define final model variables
formula_str = "protest ~ male + age + I(age^2) + black + inc + educ + leftright + leftright2  + health + married + children + reli + trust + volunteer + community_service + attend_church + dummy_NE + dummy_MW + dummy_South"
# neigbors (this must be varied in order to obtain estimates for each cluster size)
neig = 15                              # spillover definition

### 1) baseline
decision_cluster = "none"              # options: "none"; "pol"; "pro"
alldata = data_setup(anes_cc, decision_cluster, selection_cluster_pol, selection_cluster_protest, neig)
listw_cc = alldata$listw_cc
anes_trim = alldata$anes_trim
# estimation
baseline_10 <- spreg(
  formula = as.formula(formula_str),
  data = anes_trim,
  listw = listw_cc,
  model = "sarar",
  het = TRUE
)
baseline_10_effects <- impacts(baseline_10, listw = listw_cc, R = 1000)



#######################################
### Results
#######################################

### Baseline
print("Spatial Coefficients")
summary(baseline_10)
print("Direct Effects, Indirect Effects")
summary(baseline_10_effects, zstats=TRUE, short =TRUE)




















